1/14/2019

Outline

  • R package igraph
    • Get network from files (edgelist, matrix, dataframe)
    • Visualization
      • Plotting parameters
      • Layouts
    • Network and node descriptions

Dataset

igraph

Preparation

#install.packages("igraph")
#install.packages("igraphdata")
library(igraph)
library(igraphdata)

#install.packages(dplyr)
#install.packages(tidyr)
#install.packages(stringr)

1. Get network from files

Creating network

1. Get network from files

  • graph_from_adjacency_matrix()
  • graph_from_edgelist()
  • graph_from_data_frame()

1.1 graph_from_adjacency_matrix()

Used for creating a small matrix.

The networks in real world are usually large sparse matrix and stored as a edgelist.

Binary matrix:

set.seed(2)
#sample from Bernoulli distribution with sample size 100. 
adjm <- matrix(sample(0:1, 100, replace=TRUE, prob=c(0.9,0.1)), nc=10)
adjm
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    0    0    1    0    0    0    0     1
##  [2,]    0    0    0    0    0    0    0    0    0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0
##  [4,]    0    0    0    0    0    1    0    0    0     0
##  [5,]    1    0    0    0    1    0    0    0    0     0
##  [6,]    1    0    0    0    0    0    0    0    0     0
##  [7,]    0    1    0    0    1    0    0    0    1     0
##  [8,]    0    0    0    0    0    1    0    0    0     0
##  [9,]    0    0    1    0    0    0    0    0    0     0
## [10,]    0    0    0    0    0    0    0    0    0     0
g1 <- graph_from_adjacency_matrix( adjm )
set.seed(1)
plot(g1)

#default is directed
g2 <- graph_from_adjacency_matrix( adjm ,mode = "undirected")
set.seed(1)
plot(g2)

#get rid of the self-loop (in real-world maybe self-loop does not make any sense)
g3 <- graph_from_adjacency_matrix( adjm ,mode = "undirected",diag = FALSE)
set.seed(1)
plot(g3)

Sparse matrix:

adjms=g1[]
adjms
## 10 x 10 sparse Matrix of class "dgCMatrix"
##                          
##  [1,] . . . . 1 . . . . 1
##  [2,] . . . . . . . . . .
##  [3,] . . . . . . . . . .
##  [4,] . . . . . 1 . . . .
##  [5,] 1 . . . 1 . . . . .
##  [6,] 1 . . . . . . . . .
##  [7,] . 1 . . 1 . . . 1 .
##  [8,] . . . . . 1 . . . .
##  [9,] . . 1 . . . . . . .
## [10,] . . . . . . . . . .
g4=graph_from_adjacency_matrix(adjms)
set.seed(1)
plot(g4)

Weighted matrix

set.seed(1)
adjmw <- matrix(sample(0:5, 100, replace=TRUE,
                      prob=c(0.9,0.02,0.02,0.02,0.02,0.02)), nc=10)
adjmw
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    3    0    0    0    2    0    0     0
##  [2,]    0    0    0    0    0    0    0    0    0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0
##  [4,]    2    0    0    0    0    0    0    0    0     0
##  [5,]    0    0    0    0    0    0    0    0    0     0
##  [6,]    0    0    0    0    0    0    0    0    0     0
##  [7,]    4    0    0    0    0    0    0    0    0     0
##  [8,]    0    1    0    0    0    0    0    0    0     0
##  [9,]    0    0    0    0    0    0    0    0    0     0
## [10,]    0    0    0    0    0    0    0    5    0     0
g5 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE)
set.seed(1)
plot(g5)

g5
## IGRAPH 9270740 D-W- 10 6 -- 
## + attr: weight (e/n)
## + edges from 9270740:
## [1]  1->3  1->7  4->1  7->1  8->2 10->8
E(g5)$weight
## [1] 3 2 2 4 1 5

Named matrix

rownames(adjmw)=LETTERS[1:10]
colnames(adjmw)=LETTERS[1:10]
g6 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE)
set.seed(1)
plot(g6)

1.2 graph_from_edgelist()

Most network datasets are stored as edgelists. Input is two-column matrix with each row defining one edge.

gotdf=read.csv("gotstark_lannister.csv",stringsAsFactors = FALSE)
head(gotdf,5)
##   X     Source           Target       Type weight book source.family
## 1 1 Arya-Stark     Benjen-Stark Undirected      3    1         Stark
## 2 2 Arya-Stark       Bran-Stark Undirected     14    1         Stark
## 3 3 Arya-Stark    Catelyn-Stark Undirected      5    1         Stark
## 4 4 Arya-Stark Cersei-Lannister Undirected     12    1         Stark
## 5 5 Arya-Stark          Desmond Undirected      3    1         Stark
##   target.family
## 1         Stark
## 2         Stark
## 3         Stark
## 4     Lannister
## 5          <NA>
library(dplyr)
library(tidyr)
gotdf.el=gotdf%>%select(Source,Target,weight)%>%
  group_by(Source,Target)%>%
  expand(edge=c(1:weight))%>%select(-edge)
head(gotdf.el)
## # A tibble: 6 x 2
## # Groups: Source, Target [2]
##   Source     Target      
##   <chr>      <chr>       
## 1 Arya-Stark Benjen-Stark
## 2 Arya-Stark Benjen-Stark
## 3 Arya-Stark Benjen-Stark
## 4 Arya-Stark Bran-Stark  
## 5 Arya-Stark Bran-Stark  
## 6 Arya-Stark Bran-Stark

## input need to be a matrix
got1=graph_from_edgelist(gotdf.el%>%as.matrix(),directed = FALSE)
got1
## IGRAPH dfae6d2 UN-- 99 3374 -- 
## + attr: name (v/c)
## + edges from dfae6d2 (vertex names):
##  [1] Arya-Stark--Benjen-Stark  Arya-Stark--Benjen-Stark 
##  [3] Arya-Stark--Benjen-Stark  Arya-Stark--Bran-Stark   
##  [5] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
##  [7] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
##  [9] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [11] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [13] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [15] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## + ... omitted several edges
plot(got1,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.2)

Simplify the network

el <- matrix( c("foo", "bar","foo","bar", "bar", "foobar"), nc = 2, byrow = TRUE)
graph_from_edgelist(el)%>%plot()

E(got1)$weight=rep(1,ecount(got1))
got1s <- igraph::simplify( got1, remove.multiple = T, remove.loops = F, 
                 edge.attr.comb=c(weight="sum"))
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

Short name

library(stringr)
nameshort=V(got1s)$name%>%
  str_split(.,"-",simplify = TRUE)%>%
  .[,1]
V(got1s)$name[1:3]
## [1] "Arya-Stark"   "Benjen-Stark" "Bran-Stark"
nameshort[1:3]
## [1] "Arya"   "Benjen" "Bran"
V(got1s)$name=nameshort
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

1.3 graph_from_data_frame()

Most common and useful.

d: a data frame containing a symbolic edge list in the first two columns. Additional columns are considered as edge attributes.

vertices: A data frame with vertex metadata

head(gotdf,5)
##   X     Source           Target       Type weight book source.family
## 1 1 Arya-Stark     Benjen-Stark Undirected      3    1         Stark
## 2 2 Arya-Stark       Bran-Stark Undirected     14    1         Stark
## 3 3 Arya-Stark    Catelyn-Stark Undirected      5    1         Stark
## 4 4 Arya-Stark Cersei-Lannister Undirected     12    1         Stark
## 5 5 Arya-Stark          Desmond Undirected      3    1         Stark
##   target.family
## 1         Stark
## 2         Stark
## 3         Stark
## 4     Lannister
## 5          <NA>
gotdf=gotdf%>%select(-X)
got2=graph_from_data_frame(d=gotdf,directed = FALSE)
got2
## IGRAPH 2d0b738 UNW- 99 238 -- 
## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n),
## | source.family (e/c), target.family (e/c)
## + edges from 2d0b738 (vertex names):
##  [1] Arya-Stark--Benjen-Stark       Arya-Stark--Bran-Stark        
##  [3] Arya-Stark--Catelyn-Stark      Arya-Stark--Cersei-Lannister  
##  [5] Arya-Stark--Desmond            Arya-Stark--Eddard-Stark      
##  [7] Arya-Stark--Ilyn-Payne         Arya-Stark--Jeyne-Poole       
##  [9] Arya-Stark--Joffrey-Baratheon  Arya-Stark--Jon-Snow          
## [11] Arya-Stark--Jory-Cassel        Arya-Stark--Meryn-Trant       
## [13] Arya-Stark--Mordane            Arya-Stark--Mycah             
## + ... omitted several edges
plot(got2,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

get dataframe, matrix or edgelist from igraph object

igraph::as_data_frame(got2)%>%head(2)
##         from           to       Type weight book source.family
## 1 Arya-Stark Benjen-Stark Undirected      3    1         Stark
## 2 Arya-Stark   Bran-Stark Undirected     14    1         Stark
##   target.family
## 1         Stark
## 2         Stark
as_adjacency_matrix(got2)%>%head(2)
## [1] 0 1
as_edgelist(got2)%>%head(2)
##      [,1]         [,2]          
## [1,] "Arya-Stark" "Benjen-Stark"
## [2,] "Arya-Stark" "Bran-Stark"

read_graph, write_graph

## store in txt or csv or others 
write_graph(graph = got2,file = "g.txt",format = "edgelist")
read_graph(file = "g.txt",format = "edgelist",directed=F)
## IGRAPH b32ac5a U--- 99 238 -- 
## + edges from b32ac5a:
##   [1] 1-- 2 1-- 3 1-- 5 1-- 6 1-- 7 1--12 1--13 1--14 1--17 1--18 1--19
##  [12] 1--20 1--21 1--22 1--23 1--24 1--25 1--26 1--27 1--28 1--29 1--30
##  [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--13 2--15 2--21 2--28
##  [34] 2--35 2--36 2--37 2--38 2--39 2--40 2--41 3-- 5 3-- 6 3-- 7 3--12
##  [45] 3--13 3--14 3--15 3--20 3--21 3--22 3--27 3--28 3--29 3--33 3--35
##  [56] 3--37 3--38 3--40 3--42 3--43 3--44 3--45 3--46 3--47 3--48 3--49
##  [67] 3--50 3--51 3--52 3--53 4-- 7 4--11 4--27 4--28 4--52 5-- 6 5-- 7
##  [78] 5-- 8 5--12 5--13 5--14 5--15 5--16 5--20 5--21 5--27 5--28 5--29
##  [89] 5--38 5--40 5--43 5--46 5--51 5--54 5--55 5--56 5--57 5--58 5--59
## + ... omitted several edges
## store the whole graph
write_graph(got2,file = "gg",format = "pajek")
read_graph(file="gg",format="pajek")
## IGRAPH fcd4a42 U-W- 99 238 -- 
## + attr: weight (e/n)
## + edges from fcd4a42:
##  [1] 1-- 2 1-- 3 1-- 5 1-- 6 1--17 1-- 7 1--18 1--19 1--20 1--21 1--22
## [12] 1--23 1--24 1--25 1--26 1--27 1--12 1--13 1--28 1--29 1--30 1--14
## [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--36 2--37 2--21 2--38
## [34] 2--39 2--13 2--28 2--40 2--15 2--41 2--35 3-- 5 3-- 6 3-- 7 3--42
## [45] 3--43 3--44 3--45 3--37 3--20 3--46 3--21 3--22 3--47 3--38 3--48
## [56] 3--49 3--27 3--50 3--51 3--52 3--12 3--13 3--28 3--29 3--14 3--53
## [67] 3--40 3--33 3--15 3--35 4-- 7 4--11 4--27 4--52 4--28 5-- 6 5--54
## [78] 5--55 5-- 7 5--56 5--57 5--43 5--58 5-- 8 5--20 5--46 5--21 5--59
## + ... omitted several edges
got2
## IGRAPH 2d0b738 UNW- 99 238 -- 
## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n),
## | source.family (e/c), target.family (e/c)
## + edges from 2d0b738 (vertex names):
##  [1] Arya-Stark--Benjen-Stark       Arya-Stark--Bran-Stark        
##  [3] Arya-Stark--Catelyn-Stark      Arya-Stark--Cersei-Lannister  
##  [5] Arya-Stark--Desmond            Arya-Stark--Eddard-Stark      
##  [7] Arya-Stark--Ilyn-Payne         Arya-Stark--Jeyne-Poole       
##  [9] Arya-Stark--Joffrey-Baratheon  Arya-Stark--Jon-Snow          
## [11] Arya-Stark--Jory-Cassel        Arya-Stark--Meryn-Trant       
## [13] Arya-Stark--Mordane            Arya-Stark--Mycah             
## + ... omitted several edges

2. Visualization

2. Visualization

  • Plotting parameters: mapping important attributes to visual properties
  • Find a good layout
?igraph.plotting

2.1 Plotting parameters

plot(got2, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

To make the graph look nicer

  • Node color: using family name
  • Node size: degree
  • Edge width: weight
## store the fullname
fullnames=V(got2)$name
fullnames[1:3]
## [1] "Arya-Stark"   "Benjen-Stark" "Bran-Stark"
#get family name
familynames=fullnames%>%str_split("-",simplify = TRUE)%>%.[,2]
familynames[familynames==""]="None"
familynames[familynames=="(guard)"]="None"
# add vertices attributes
V(got2)$familyname=familynames
V(got2)$fullname=fullnames
firstnames=fullnames%>%str_split("-",simplify = TRUE)%>%.[,1]
V(got2)$name=firstnames # first name

Set colors and legend.

  • pch: plotting symbols appearing in the legend
  • pt.bg: background color for point
  • cex: text size
  • pt.cex: point size
  • ncol: number of columns of the legend
  • bty: "o"– rectangle box; "n" – no box
vcol=V(got2)$familyname
vcol[(vcol!="Stark")&(vcol!="Lannister")]="gray50"
vcol[vcol=="Stark"]="tomato"
vcol[vcol=="Lannister"]="gold"
V(got2)$color=vcol
V(got2)$size=degree(got2)%>%log()*4
E(got2)$width=E(got2)$weight%>%log()/2
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=1, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
       col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)

Plot only labels of the nodes

plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_kk)

2.2 Layouts

Force-directed layouts: suitable for general, small to medium sized graphs. (computational complexity; based on physical analogies)

  • layout_with_fr: Fruchterman-Reingold is one of the most used force-directed layout algorithms. Force-directed layouts try to get a nice-looking graph where edges are similar in length and cross each other as little as possible. As a result, nodes are evenly distributed through the chart area, and the layout is intuitive in that nodes which share more connections are closer to each other.
  • layout_with_kk: Another popular force-directed algorithm that produces nice results for connected graphs is Kamada Kawai.
  • layout_with_graphopt: …

For large graphs:

  • layout_with_lgl: The LGL algorithm is meant for large, connected graphs. Here you can also specify a root: a node that will be placed in the middle of the layout.
  • layout_with_drl:
  • layout_with_gfr:

  • layout_with_dh:simulated annealing algorithm by Davidson and Harel
#layout_with_dh
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
       col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)

Selecting a layout automatically

  • connected and vcount<=100: kk
  • vcount<=1000:fr
  • else: drl
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout.auto(got2))

Without label and color the edge.

set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)

##color the edge
got2
## IGRAPH 2d0b738 UNW- 99 238 -- 
## + attr: name (v/c), familyname (v/c), fullname (v/c), color (v/c),
## | size (v/n), Type (e/c), weight (e/n), book (e/n), source.family
## | (e/c), target.family (e/c), width (e/n)
## + edges from 2d0b738 (vertex names):
##  [1] Arya--Benjen   Arya--Bran     Arya--Catelyn  Arya--Cersei  
##  [5] Arya--Desmond  Arya--Eddard   Arya--Ilyn     Arya--Jeyne   
##  [9] Arya--Joffrey  Arya--Jon      Arya--Jory     Arya--Meryn   
## [13] Arya--Mordane  Arya--Mycah    Arya--Myrcella Arya--Petyr   
## [17] Arya--Rickon   Arya--Robb     Arya--Robert   Arya--Rodrik  
## [21] Arya--Sandor   Arya--Sansa    Arya--Syrio    Arya--Tomard  
## + ... omitted several edges
ecol=rep("gray50",ecount(got2))
ecol[E(got2)$source.family=="Stark"]="tomato"
ecol[E(got2)$source.family=="Lannister"]="gold"
ecol[(ecol=="tomato")&(E(got2)$target.family=="Lannister")&(!is.na(E(got2)$target.family))]="orange"
ecol[(ecol=="gold")&(E(got2)$target.family=="Stark")&(!is.na(E(got2)$target.family))]="orange"

set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black", edge.color=ecol,
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Stark-Lannister","Other"),
       col=c("tomato","gold","orange","gray50"), lty=rep(1,4), cex=.8, bty="n", ncol=1)

layout is not deterministic

Different runs will result in slightly different configurations. Saving the layout or set.seed allows us to get the exact same result multiple times, which can be helpful if you want to plot the time evolution of a graph, or different relationships – and want nodes to stay in the same place in multiple plots.

set.seed(1)
l=layout_with_dh(got2)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l)

rescale

  • norm_coords
  • rescale=F
  • can use layout=l*2
l=layout_with_fr(got2)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1) #default -- scaled
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l,rescale=F)

Will introduce interactive r packages next time.

par(mfrow=c(2,2), mar=c(0,0,0,0))
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.5,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.8,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*1,rescale=F) 
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*2,rescale=F)

#dev.off()

3. Network and node descriptions

  • Density: edge_density
  • Degree: degree
  • centrality and centralization:
    • centr_degree
    • closeness, centr_clo
    • eigen_centrality, centr_eigen
    • betweenness, edge_betweenness, centr_betw
  • reciprocity,transitivity,diameter,…

Density

The proportion of present edges from all possible ties.

edge_density(got2, loops=F)
## [1] 0.04906205
ecount(got2)/(vcount(got2)*(vcount(got2)-1))*2 #for an undirected network
## [1] 0.04906205

Node degrees

'degree' has a mode of 'in' for in-degree, 'out' for out-degree, and 'all' or 'total' for total degree.

Notice the graph is undirected. So there is no difference under different parameter setting.

deg <- degree(got2, mode="all")
hist(deg, breaks=1:vcount(got2)-1, main="Histogram of node degree")

deg.dist <- degree_distribution(got2, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange", 
      xlab="Degree", ylab="Cumulative Frequency")

centrality and centralization

Who is the most important character?

  • Degree
  • Closeness
  • Eigenvector
  • Betweeness

Degree (number of ties).

Normalization should be the max degree the network can get

degree(got2, mode="in",loops = F)%>%sort(decreasing = TRUE)%>%.[1:5]
##  Eddard Catelyn    Bran    Arya  Cersei 
##      56      41      32      27      27
#Notice this is undirected network, the choice of mode does not matter
centr_degree(got2, mode="in", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
centr_degree(got2, mode="all", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
#Pay attention to whether allowing self-loop or not
# Normalization may differ due to the setting
centr_degree(got2, mode="all", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = T)$theoretical_max
## [1] 9702

Closeness (centrality based on distance to others in the graph) Inverse of the node's average geodesic distance to others in the network

#whether to include weight or not
#If a graph has edge attribute weight, the weight will be automatically took into consideration
closeness(got2, mode="all", weights=NA) %>%sort(decreasing = TRUE)%>%.[1:5]
##      Eddard     Catelyn        Bran        Arya      Cersei 
## 0.006993007 0.006329114 0.006097561 0.005882353 0.005847953
closeness(got2, mode="all")%>%sort(decreasing = TRUE)%>%.[1:5]
##       Eddard      Catelyn      Rickard         Bran         Arya 
## 0.0010245902 0.0010141988 0.0010080645 0.0010030090 0.0009852217
centr_clo(got2, mode="all", normalized=T)$res %>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 0.6853147 0.6202532 0.5975610 0.5764706 0.5730994

Eigenvector (centrality proportional to the sum of connection centralities) Values of the first eigenvector of the graph adjacency matrix

eigen_centrality(got2, directed=F, weights=NA)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard   Catelyn      Bran    Cersei      Arya 
## 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883
eigen_centrality(got2, directed=F)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard    Robert    Cersei   Catelyn     Petyr 
## 1.0000000 0.8538947 0.4281666 0.3352669 0.2441671
centr_eigen(got2, directed=F, normalized=T) $vector%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883

Betweenness

The betweenness centrality for each vertex is the number of these shortest paths that pass through the vertex.

betweenness(got2, directed=F, weights=NA)%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard   Catelyn      Bran      Arya      Robb 
## 2155.2656 1554.1678  915.6561  510.5637  366.8074
betweenness(got2, directed=F)%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard   Catelyn      Bran    Benjen      Arya 
## 1835.5000 1483.2500 1024.8571  694.4762  689.5833
centr_betw(got2, directed=F, normalized=T)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 2155.2656 1554.1678  915.6561  510.5637  366.8074

The edge betweenness centrality is defined as the number of the shortest paths that go through an edge in a graph or network. (result will be different from betweenness)

ecount(got2)
## [1] 238
vcount(got2)
## [1] 99
edge_betweenness(got2, directed=F, weights=NA)
##   [1]  66.554021  68.525794 150.037085  13.814394  98.000000 121.211633
##   [7]  24.635714  98.000000  11.431061   9.783333  15.508838  49.217460
##  [13]  24.635714  35.066667  49.217460  12.907684  12.391667  44.578680
##  [19]  20.452128  12.697727  22.188203  13.000505  35.066667  28.935714
##  [25]  29.952381  24.635714  26.681061  70.556799  64.249636  98.000000
##  [31]  18.932937  22.647708  24.715132  98.000000  52.684557  37.586180
##  [37]  23.408819  42.711613  98.000000   6.924242 198.337900  51.773939
##  [43] 271.698162  98.000000  45.416667  98.000000  98.000000  79.067063
##  [49]  16.964379  19.564379  14.657576  22.281046  98.000000  18.458333
##  [55]  98.000000  98.000000  18.867193  26.350093  23.685808  82.607296
##  [61]  15.350000  50.993347  30.264018  16.197712  25.792590  55.504545
##  [67]  16.402020  37.202381  36.646233  28.146970  75.291508   3.000000
##  [73]  13.628737  20.293570  21.043888 106.273882  98.000000  98.000000
##  [79] 426.464286  41.333333  98.000000  52.583333  98.000000  56.183210
##  [85]  24.066667  25.283333  32.160101  98.000000  35.279040  36.998268
##  [91]  98.000000  98.000000  98.000000  98.000000  98.000000  98.000000
##  [97]  30.028571  26.904762  21.966667  84.686616  98.000000  62.544517
## [103]  22.750000  38.054762  98.000000  32.487879  36.546573  98.000000
## [109]  30.083009  63.476984  25.974675  51.054762  59.298413  63.476984
## [115]  62.814663  22.635714  17.076012  34.566667   8.633333  12.033333
## [121]   8.406061  12.805556  48.782540  22.635714  48.782540   9.352381
## [127]  34.566667  17.426190  15.699206  18.600000   8.537302  27.977381
## [133]  30.845238  18.084560  18.671861  14.109957  22.635714  56.666667
## [139]  98.000000  98.000000  98.000000  98.000000  98.000000  98.000000
## [145]  98.000000  50.728571  98.000000  36.269415  63.433333  31.266667
## [151]  34.616667  42.525486  37.000000  98.000000  73.352564  51.727758
## [157]  58.311905  49.831602  98.000000  50.728571  62.933333  29.719048
## [163]  98.000000  63.433333  98.000000  41.835714  98.000000  39.571429
## [169]  33.133333 133.863545  64.007520  32.666667  43.892857  26.659524
## [175]  53.520238  62.933333  49.436597  98.000000  53.578571  65.933333
## [181]  98.000000  79.285803  52.631818  98.000000  35.474675  50.728571
## [187]  73.352564  98.000000   7.662106   7.731046  12.428773  35.962121
## [193]  24.647436  11.170130   8.206644  13.616760   9.066760  24.226842
## [199]  15.311783   7.645440  14.114394   7.746630  10.860788   6.159524
## [205]   6.174242  24.647436  40.509091  21.528788  98.000000  19.935714
## [211]  18.252381   7.550000   2.950000   5.575000  16.552814   8.066667
## [217]  98.000000  98.000000  12.912338  42.495455   8.444949  32.066667
## [223]  12.542532  13.466342  32.300000  38.701587  19.481818  16.502381
## [229]  15.485714   7.276190   5.867532   6.376190  34.523016   8.117532
## [235]  34.523016  17.682576   2.567100  14.645238

Other properties

  • transitivity
  • reciprocity
  • clustering coefficient

Exercise

Exercise

  • Download dataset to get network in book3 https://github.com/mathbeveridge/asoiaf.
  • Only keep the nodes with degree over 5.
  • Only keep the nodes connected to family "Stark", "Targaryen" and "Lannister".
  • Present the network nicely
  • List the top 5 most important character.